Grouping

library(curl)
## Using libcurl 8.4.0 with LibreSSL/3.3.6
Fires <- read.csv(curl("https://raw.githubusercontent.com/orrortega/statistics-natural-resources/main/data/Largest_fires_by_continent.csv"))
attach(Fires)

In grouping, the values of the first two variables are mapped to the x and y axes. Then additional variables are mapped to other visual characteristics such as color, shape, size, line type, and transparency. Grouping allows you to plot the data for multiple groups in a single graph.

library(ggplot2)
ggplot(Fires, 
      aes(x = speed..km.day.1., y=Size..km2.)) + 
      geom_point() +
      labs(title = "Size (km2) by speed (km/day)")

Next, let’s include the Regions, using color.

ggplot(Fires, 
      aes(x = speed..km.day.1., y=Size..km2., 
      color = dominant.spread.direction)) + geom_point() +
      labs(title = "Size (km2) by speed (km/day) and region")

Finally, let’s add the shape of the trees, using the shape of the points to indicate shape. We’ll increase the point size and add transparency to make the individual points clearer.

ggplot(Fires, 
      aes(x = speed..km.day.1., y=Size..km2., 
      color = dominant.spread.direction, shape = Region)) + geom_point() +
      labs(title = "Size (km2) by speed (km/day), region and dominant spread direction")

ggplot(Fires, 
  aes(x = speed..km.day.1., y=Size..km2., color = Region)) + 
  geom_point(alpha = .4, size = 3) +
  geom_smooth(se=FALSE, method = "lm", formula = y~poly(x,2), size = 1.5) + 
  labs(x = "Speed (km/day)", y = "Size (km2)", title = "Size (km2) by speed (km/day) and region", 
    subtitle = "Summary of Large Forest Fires", y = "", color = "Regions") +
    scale_color_brewer(palette = "Set1") + theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Grouping allows you to plot multiple variables in a single graph, using visual characteristics such as color, shape, and size.

In faceting, a graph consists of several separate plots or small multiples, one for each level of a third variable, or combination of variables. It is easiest to understand this with an example.

ggplot(Fires, aes(x = Size..km2.)) + geom_histogram(fill = "cornflowerblue",
    color = "white") + facet_wrap(~Region, ncol = 1) +
    labs(title = "Size (km2) by species")

The facet_wrap function creates a separate graph for each species. The ncol option controls the number of columns.

ggplot(Fires, aes(x = Size..km2.)) + 
  geom_histogram(fill = "cornflowerblue", color = "white") + 
  facet_wrap(Region ~ dominant.spread.direction) +
  labs(title = "Size histograms by Region")

The format of the facet_grid function is facet_grid( row variable(s) ~ column variable(s))

# plot total area by year, for each country
ggplot(plotdata, aes(x=year, y = total)) + geom_line(color="grey") + 
geom_point(color="blue") + facet_wrap(~Region) + theme_minimal(base_size = 9) + 
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
labs(title = "Changes in burned area",x = "Year",y = "Total area")

Maps

library(rworldmap) 
# get map
MyMap <- ggplot() + borders("world", colour="black", fill="grey") + theme_bw()
MyMap

## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(viridis)
MyMap  + 
  geom_point(data = Fires, aes(x=lon, y=lat, color = Size..km2.),size = 2) +
  scale_color_viridis() +
  theme(panel.grid.major = element_line(color = "gray60", linetype = "dashed", size = 0.25), 
        panel.background = element_rect(fill = "aliceblue"))

##

library(viridis)
MyMap  + coord_fixed(xlim=c(-10, 37.5), ylim=c(30, 48)) + 
  geom_point(data = Fires, aes(x=lon, y=lat, color = Size..km2.),size = 2) +
  scale_color_viridis() +
  theme(panel.grid.major = element_line(color = "gray60", linetype = "dashed", size = 0.25), 
        panel.background = element_rect(fill = "aliceblue"))

MyMap  + coord_fixed(xlim=c(-10, 37.5), ylim=c(30, 48)) + 
  geom_point(data = Fires, aes(x=lon, y=lat, color = Size..km2.),size = 2) +
  scale_color_viridis() +
  facet_wrap(~year) +
  theme(panel.grid.major = element_line(color = "gray60", linetype = "dashed", size = 0.25), 
        panel.background = element_rect(fill = "aliceblue"))

Time-dependent graphs

Time series

ggplot(pm10subset, aes(x = day, y = pm10)) +
      geom_line() +
      labs(title = "Levels of pm10 during 2014", x = "Day of the year", y = "pm10 concentration")

library(scales)
ggplot(pm10subset, aes(x = day, y = pm10)) +
    geom_line(color = "indianred3", size=1 ) +
    geom_smooth() +  labs(title = "Pm10 levels", subtitle = "2014", x = "DOY", y = "pm10 concentration") +
    theme_minimal()

# multivariate time series
attach(pm10)
ggplot(pm10,
    aes(x=day , y= pm10, color=site)) + geom_line(size=1) + 
    labs(title = "pm10 levels London",
    subtitle = "2014", caption = "source: OpenAir", y = "pm10 concentration") +
    theme_minimal() + scale_color_brewer(palette = "Dark2")

Area Charts

ggplot(pm10subset, aes(x = day, y = pm10)) + 
  geom_area(fill="lightblue", color="black") + 
  labs(title = "pm10 levels", x = "day", y = "pm10 concentration")

A stacked area chart can be used to show differences between groups over time.

ggplot(pm10, aes(x = day, y = pm10, fill= site)) + 
  geom_area() + 
  labs(title = "pm10 levels", x = "day", y = "pm10 concentration")

Statistical models

Correlation plots

The levels of the site variable can be reversed using the fct_rev function in the forcats package.

df <- dplyr::select_if(FI_Murcia, is.numeric)
r <- cor(df, use="complete.obs") 
round(r,2)
##           Shape Height Diameter1 Diameter2
## Shape      1.00  -0.52     -0.22     -0.22
## Height    -0.52   1.00      0.67      0.67
## Diameter1 -0.22   0.67      1.00      0.99
## Diameter2 -0.22   0.67      0.99      1.00

library(ggcorrplot) 
ggcorrplot(r)

By default, zero count bars are dropped and the remaining bars are made wider. This may not be the behavior you want. You can modify this using the position = position_dodge(preserve = "single") option.

ggcorrplot(r,
            hc.order = TRUE,
            type = "lower", lab = TRUE)

Linear Regression

Height_lm <- lm(Height ~ Diameter1 + Diameter2, data = FI_Murcia)
library(visreg)
#The visreg function takes (1) the model and (2) the variable of interest and plots 
#the conditional relationship, controlling for the other variables. 
#The option gg = TRUE is used to produce a ggplot2 graph.
visreg(Height_lm, "Diameter1", gg = TRUE) +
  labs(title = "Relationship between Height and Diameter1",
  caption = "source:   NFI Murcia",
y = "Height (m)",
x = "Diameter (m)")

Logistic regression

visreg(Chrod_glm, "altitude",
    gg = TRUE,
    scale="response") + labs(y = "Prob(Presence)", x = "Altitude",
    title = "Relationship of Age and Presence")